* Begin log file
capture log close sublog
log using "$projdir/log/5_qwi_build.txt", name(sublog) text replace

*-------------------------------------------------------------------------------
* Description: Clean and deseasonalize QWI data
* Author: Denis Sosinskiy
*
* Updated: August 28, 2023
*-------------------------------------------------------------------------------
clear all

* Set start and end quarters
local start_tm = 199
local end_tm = 249 
local tot_tm = `end_tm' - `start_tm' + 1
local endqtrlab "2022Q2"
 
* Load the data
import delimit "$projdir/dta/build/src/qwi_raw_08102023.csv", clear

* Generate time variable
qui gen tm = yq(year, quarter)
format tm %tq

* Generate sample identifiers
qui gen samp_allage = (agegrp == "A00" & sex == 0)
qui gen samp_f = (agegrp == "A00" & sex == 2)
qui gen samp_teen = (agegrp == "A01" & sex == 0)
qui gen samp_youth = (agegrp == "A02" & sex == 0)

* Rename variables
qui rename 	(geography industry earnbeg emp) (cty_fips naics earn empbeg)

* Drop obs which don't observe key variables
foreach v in searnbeg semp sempend ssep ssepbeg {
	drop if inlist(`v', -2, -1, 5, 9, 11)
}

qui keep if !mi(cty_fips) & cty_fips != 0 & !mi(earn) & !mi(empbeg) & !mi(empend) & !mi(sep) & !mi(sepbeg) 

foreach v in cty_fips earn empbeg empend sep sepbeg {
	assert !mi(`v')
}

* Generate average employment
qui gen employment = (empbeg + empend)/2

* Generate seperation rates
qui gen sepr = sepbeg/employment
qui gen sepnewr = (sep - sepbeg)/employment

* Convert earnings to weekly
qui replace earn = earn/4

* Get total employment numbers
gen x = employment if naics == 0 & samp_allage == 1
bysort cty_fips tm: egen emp10 = max(x)
keep if !mi(emp10)
qui drop x
drop if naics == 0

* Keep only samples of interest
local s = "allage"
keep if samp_`s' == 1

* Restrict to desired time-period and keep balanced panel
qui keep if inrange(tm, `start_tm', `end_tm')	
bysort cty_fips: gen count = _N
qui keep if count == `tot_tm'

* Generate quarter identifier 
qui gen qrt = quarter(dofq(tm))

* Deseasonalize variables
foreach y in employment earn sepr sepnewr {
	qui gen resid_`y' = .
	levelsof cty_fips, local(lvls)
	foreach l of local lvls {
		qui reg `y' i.qrt if cty_fips == `l'
		qui predict double resid_`y'_`l' if cty_fips == `l', residuals
		qui replace resid_`y' = resid_`y'_`l' if cty_fips == `l'
		qui drop resid_`y'_`l'
	}
	bysort cty_fips: egen avg_`y' = mean(`y')
	replace `y' = avg_`y' + resid_`y'
}

* Save
qui save "$projdir/dta/build/cln/qwi_allage.dta", replace

* Close log file
log close sublog
